Network visualization

MACS 40700
University of Chicago

May 15, 2017

Network

  • Network/graph
  • Nodes/verticies
  • Links/edges

Undirected graph

Directed graph

Political polarization

  • Income inequality
  • Gerrymandering/redistricting
  • Activists involved in primary elections
  • Political realignment in the South
  • Electing more partisan members to Congress
  • Shift by existing members to ideological poles
  • Increasingly partisan media

Political polarization

  • Partisanship networks in U.S. House
  • Varying incentives
  • Cooperation on roll-call votes
  • Nodes
  • Edges
  • Undirected network

Visualizing polarization

  • Cross-party pairs
  • Same-party pairs
  • Attributes

Figure 2 from The Rise of Partisanship in the U.S. House of Representatives

Force-directed placement

D3 example of a force-directed graph

Figure 2 from The Rise of Partisanship in the U.S. House of Representatives

Love Actually

Love Actually trailer

Speaker-by-scene matrix

by_speaker_scene <- lines %>%
  count(scene, character)
by_speaker_scene
## Source: local data frame [162 x 3]
## Groups: scene [?]
## 
## # A tibble: 162 x 3
##    scene                character     n
##    <int>                    <chr> <int>
##  1     2       Billy (Bill Nighy)     5
##  2     2      Joe (Gregor Fisher)     3
##  3     3      Jamie (Colin Firth)     5
##  4     4     Daniel (Liam Neeson)     3
##  5     4    Karen (Emma Thompson)     6
##  6     5    Colin (Kris Marshall)     4
##  7     6    Jack (Martin Freeman)     2
##  8     6       Judy (Joanna Page)     1
##  9     7    Mark (Andrew Lincoln)     4
## 10     7 Peter (Chiwetel Ejiofor)     4
## # ... with 152 more rows
library(reshape2)
speaker_scene_matrix <- by_speaker_scene %>%
  acast(character ~ scene, fun.aggregate = length)

speaker_scene_matrix[1:5, 1:5]
##                       2 3 4 5 6
## Aurelia (Lúcia Moniz) 0 0 0 0 0
## Billy (Bill Nighy)    1 0 0 0 0
## Colin (Kris Marshall) 0 0 0 1 0
## Daniel (Liam Neeson)  0 0 1 0 0
## Harry (Alan Rickman)  0 0 0 0 0
dim(speaker_scene_matrix)
## [1] 20 76

Hierarchical clustering

norm <- speaker_scene_matrix / rowSums(speaker_scene_matrix)

h <- hclust(dist(norm, method = "manhattan"))

ggdendro::ggdendrogram(h)

Hierarchical clustering

ordering <- h$labels[h$order]
ordering
##  [1] "Natalie (Martine McCutcheon)" "PM (Hugh Grant)"             
##  [3] "Aurelia (Lúcia Moniz)"        "Jamie (Colin Firth)"         
##  [5] "Daniel (Liam Neeson)"         "Sam (Thomas Sangster)"       
##  [7] "Jack (Martin Freeman)"        "Judy (Joanna Page)"          
##  [9] "Colin (Kris Marshall)"        "Tony (Abdul Salis)"          
## [11] "Billy (Bill Nighy)"           "Joe (Gregor Fisher)"         
## [13] "Mark (Andrew Lincoln)"        "Juliet (Keira Knightley)"    
## [15] "Peter (Chiwetel Ejiofor)"     "Karl (Rodrigo Santoro)"      
## [17] "Sarah (Laura Linney)"         "Mia (Heike Makatsch)"        
## [19] "Harry (Alan Rickman)"         "Karen (Emma Thompson)"

Hierarchical clustering

scenes <- by_speaker_scene %>%
    filter(n() > 1) %>%        # scenes with > 1 character
    ungroup() %>%
    mutate(scene = as.numeric(factor(scene)),
           character = factor(character, levels = ordering))

ggplot(scenes, aes(scene, character)) +
    geom_point() +
    geom_path(aes(group = scene))

Adjacency matrix

non_airport_scenes <- speaker_scene_matrix[, colSums(speaker_scene_matrix) < 10]
non_airport_scenes[1:5, 1:5]
##                       2 3 4 5 6
## Aurelia (Lúcia Moniz) 0 0 0 0 0
## Billy (Bill Nighy)    1 0 0 0 0
## Colin (Kris Marshall) 0 0 0 1 0
## Daniel (Liam Neeson)  0 0 1 0 0
## Harry (Alan Rickman)  0 0 0 0 0
cooccur <- non_airport_scenes %*% t(non_airport_scenes)
cooccur[1:5, 1:5]
##                       Aurelia (Lúcia Moniz) Billy (Bill Nighy)
## Aurelia (Lúcia Moniz)                     5                  0
## Billy (Bill Nighy)                        0                  6
## Colin (Kris Marshall)                     0                  0
## Daniel (Liam Neeson)                      0                  0
## Harry (Alan Rickman)                      0                  0
##                       Colin (Kris Marshall) Daniel (Liam Neeson)
## Aurelia (Lúcia Moniz)                     0                    0
## Billy (Bill Nighy)                        0                    0
## Colin (Kris Marshall)                     6                    0
## Daniel (Liam Neeson)                      0                   11
## Harry (Alan Rickman)                      0                    0
##                       Harry (Alan Rickman)
## Aurelia (Lúcia Moniz)                    0
## Billy (Bill Nighy)                       0
## Colin (Kris Marshall)                    0
## Daniel (Liam Neeson)                     0
## Harry (Alan Rickman)                    10

Adjacency matrix

heatmap(cooccur)

Adjacency matrix

cooccur %>%
  as_tibble %>%
  mutate(id1 = rownames(cooccur)) %>%
  gather(id2, n, -id1) %>%
  mutate_at(vars(id1, id2), funs(factor(., levels = ordering))) %>%
  ggplot(aes(id1, id2, fill = n)) +
  geom_tile() +
  scale_fill_continuous(low = "white", high = "red") +
  coord_fixed() +
  labs(x = NULL,
       y = NULL,
       fill = NULL) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Node-edge diagram

g <- graph_from_adjacency_matrix(cooccur,
                                 weighted = TRUE,
                                 mode = "undirected",
                                 diag = FALSE)
g
## IGRAPH UNW- 20 37 -- 
## + attr: name (v/c), weight (e/n)
## + edges (vertex names):
##  [1] Aurelia (Lúcia Moniz)--Jamie (Colin Firth)   
##  [2] Billy (Bill Nighy)   --Joe (Gregor Fisher)   
##  [3] Colin (Kris Marshall)--Mark (Andrew Lincoln) 
##  [4] Colin (Kris Marshall)--Tony (Abdul Salis)    
##  [5] Daniel (Liam Neeson) --Karen (Emma Thompson) 
##  [6] Daniel (Liam Neeson) --Sam (Thomas Sangster) 
##  [7] Harry (Alan Rickman) --Jamie (Colin Firth)   
##  [8] Harry (Alan Rickman) --Karen (Emma Thompson) 
## + ... omitted several edges

Node-edge diagram

ggraph(g) + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none")

R packages for networks

Layout

ggraph(g) + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Default layout (Nicely) algorithm")

ggraph(g, layout = "kk") + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Kamada and Kawai spring-based algorithm")

ggraph(g, layout = "fr") + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  labs(title = "Fruchterman-Reingold algorithm",
       subtitle = "Force-directed layout, 500 iterations")

ggraph(g, layout = "fr", niter = 100) + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  labs(title = "Fruchterman-Reingold algorithm",
       subtitle = "Force-directed layout, 100 iterations")

ggraph(g, layout = "fr", niter = 10) + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  labs(title = "Fruchterman-Reingold algorithm",
       subtitle = "Force-directed layout, 10 iterations")

ggraph(g, layout = "grid") + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Grid algorithm")

ggraph(g, layout = "star") + 
  geom_edge_link(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Star algorithm")

ggraph(g, layout = "linear") + 
  geom_edge_arc(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Linear algorithm")

ggraph(g, layout = "linear", circular = TRUE) + 
  geom_edge_arc(aes(edge_width = weight)) + 
  geom_node_point() +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width_continuous(range = c(.5, 3)) +
  theme_graph() +
  theme(legend.position = "none") +
  ggtitle("Star algorithm (circular)")

Source: Introduction to ggraph: Layouts

Dendrograms

dendrogram <- as.dendrogram(hclust(dist(iris[, 1:4])))

ggraph(dendrogram, 'dendrogram') + 
  geom_edge_elbow()

ggraph(dendrogram, 'dendrogram', circular = TRUE) + 
  geom_edge_elbow() + 
  coord_fixed()

Nodes

ggraph(g, layout = "kk") +
  geom_point(aes(x = x, y = y))

ggraph(g, layout = "kk") + 
  geom_edge_link() + 
  geom_node_point()

create_layout()

create_layout(g, layout = "kk")
##          x       y                         name ggraph.orig_index circular
## 1  -2.1417  1.3424        Aurelia (Lúcia Moniz)                 1    FALSE
## 2  -4.1922 -1.9677           Billy (Bill Nighy)                 2    FALSE
## 3   0.2076 -1.0916        Colin (Kris Marshall)                 3    FALSE
## 4   0.7592 -0.0822         Daniel (Liam Neeson)                 4    FALSE
## 5  -0.6400  0.1823         Harry (Alan Rickman)                 5    FALSE
## 6  -0.0503  0.6976        Jack (Martin Freeman)                 6    FALSE
## 7  -0.5979 -0.1755          Jamie (Colin Firth)                 7    FALSE
## 8  -4.4812 -1.0155          Joe (Gregor Fisher)                 8    FALSE
## 9  -0.2101  0.6793           Judy (Joanna Page)                 9    FALSE
## 10 -1.1259 -2.0090     Juliet (Keira Knightley)                10    FALSE
## 11 -0.1101 -0.1096        Karen (Emma Thompson)                11    FALSE
## 12 -0.4277 -0.0871       Karl (Rodrigo Santoro)                12    FALSE
## 13 -0.0347 -0.8263        Mark (Andrew Lincoln)                13    FALSE
## 14 -0.2749  0.1918         Mia (Heike Makatsch)                14    FALSE
## 15  0.0147  0.2879 Natalie (Martine McCutcheon)                15    FALSE
## 16 -0.2896 -2.3068     Peter (Chiwetel Ejiofor)                16    FALSE
## 17 -0.3799  0.4961              PM (Hugh Grant)                17    FALSE
## 18  0.0907  0.5504        Sam (Thomas Sangster)                18    FALSE
## 19 -0.0635 -0.4586         Sarah (Laura Linney)                19    FALSE
## 20  1.4496 -1.9517           Tony (Abdul Salis)                20    FALSE
##    ggraph.index
## 1             1
## 2             2
## 3             3
## 4             4
## 5             5
## 6             6
## 7             7
## 8             8
## 9             9
## 10           10
## 11           11
## 12           12
## 13           13
## 14           14
## 15           15
## 16           16
## 17           17
## 18           18
## 19           19
## 20           20

Nodes

ggraph(g, layout = "kk") + 
  geom_edge_link() + 
  geom_node_point()

ggraph(g, layout = "kk") + 
  geom_edge_link() + 
  geom_node_text(aes(label = name))

ggraph(g, layout = "kk") + 
  geom_edge_link() + 
  geom_node_label(aes(label = name))

ggraph(g, layout = "kk") + 
  geom_edge_link() + 
  geom_node_label(aes(label = name), repel = TRUE)

Communicating attributes of nodes

## IGRAPH DNW- 17 49 -- 
## + attr: name (v/c), media (v/c), media.type (v/n), type.label
## | (v/c), audience.size (v/n), type (e/c), weight (e/n)
## + edges (vertex names):
##  [1] s01->s02 s01->s03 s01->s04 s01->s15 s02->s01 s02->s03 s02->s09
##  [8] s02->s10 s03->s01 s03->s04 s03->s05 s03->s08 s03->s10 s03->s11
## [15] s03->s12 s04->s03 s04->s06 s04->s11 s04->s12 s04->s17 s05->s01
## [22] s05->s02 s05->s09 s05->s15 s06->s06 s06->s16 s06->s17 s07->s03
## [29] s07->s08 s07->s10 s07->s14 s08->s03 s08->s07 s08->s09 s09->s10
## [36] s10->s03 s12->s06 s12->s13 s12->s14 s13->s12 s13->s17 s14->s11
## [43] s14->s13 s15->s01 s15->s04 s15->s06 s16->s06 s16->s17 s17->s04

Communicating attributes of nodes

ggraph(net) +
  geom_edge_link() +
  geom_node_point() +
  geom_node_label(aes(label = media), repel = TRUE) +
  theme_graph()

Color

ggraph(net) +
  geom_edge_link() +
  geom_node_point(aes(color = type.label), size = 2) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

Size

ggraph(net) +
  geom_edge_link() +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

Edges

ggraph(net) +
  geom_edge_link() +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

ggraph(net) +
  geom_edge_fan() +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

Decorating edges

ggraph(net) +
  geom_edge_fan(arrow = arrow(length = unit(4, 'mm'))) +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

ggraph(net) +
  geom_edge_fan(arrow = arrow(length = unit(4, 'mm')), 
                   end_cap = circle(3, 'mm')) +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

Color

ggraph(net) +
  geom_edge_fan(arrow = arrow(length = unit(4, 'mm')), 
                   end_cap = circle(3, 'mm'),
                aes(color = type)) +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph()

ggraph(net) +
  geom_edge_fan(arrow = arrow(length = unit(4, 'mm')), 
                   end_cap = circle(3, 'mm'),
                aes(color = type)) +
  geom_node_point(aes(color = type.label, size = audience.size)) +
  geom_node_label(aes(label = media), repel = TRUE, size = 2) +
  theme_graph() +
  scale_color_brewer(palette = "Paired") +
  scale_edge_color_brewer(palette = "Dark2")